Setup

library(spacc)
library(data.table)
library(ggplot2)

data_path <- "../data/"

1. Load and Prepare Data

header  <- fread(file.path(data_path, "austria_header.csv"))
species <- fread(file.path(data_path, "austria_species.csv"))

cat(sprintf("Plots: %d\nSpecies records: %d\nUnique species: %d\n",
            nrow(header), nrow(species), uniqueN(species$WFO_TAXON)))
## Plots: 52526
## Species records: 852075
## Unique species: 2700
species[, .N, by = STATUS]
##    STATUS      N
##    <char>  <int>
## 1: native 826055
## 2:    neo  26020

Build species matrices

Split by native/alien status and create separate site-by-species matrices sharing the same row order. We also assign each plot to a decade for temporal analysis.

rds_mats <- file.path(comp_dir, "matrices.rds")

# Keep only plots present in both header and species
common_ids <- intersect(header$PlotObservationID, species$PlotObservationID)
header <- header[PlotObservationID %in% common_ids]

# Assign decade
header[, decade := ifelse(Year < 1970, "pre-1970",
                          paste0(floor(Year / 10) * 10, "s"))]

# Coordinates
coords <- header[, .(x = Longitude, y = Latitude)]

# Helper: long-form species table -> wide presence/absence matrix
make_pa_matrix <- function(sp_data, plot_ids) {
  sp_data <- sp_data[PlotObservationID %in% plot_ids]
  wide <- dcast(sp_data, PlotObservationID ~ WFO_TAXON,
                fun.aggregate = function(x) as.integer(length(x) > 0),
                value.var = "WFO_TAXON")
  wide <- wide[match(plot_ids, PlotObservationID)]
  mat <- as.matrix(wide[, -1])
  mat[is.na(mat)] <- 0L
  rownames(mat) <- wide$PlotObservationID
  mat
}

if (file.exists(rds_mats)) {
  cat("Loading cached matrices...\n")
  mats <- readRDS(rds_mats)
  native_mat <- mats$native; alien_mat <- mats$alien; all_mat <- mats$all
  rm(mats)
} else {
  plot_ids <- header$PlotObservationID
  native_mat <- make_pa_matrix(species[STATUS == "native"], plot_ids)
  alien_mat  <- make_pa_matrix(species[STATUS == "neo"],    plot_ids)
  all_mat    <- make_pa_matrix(species, plot_ids)
  saveRDS(list(native = native_mat, alien = alien_mat, all = all_mat), rds_mats)
}
## Loading cached matrices...
cat(sprintf("Native species: %d\nAlien species:  %d\nAll species:    %d\n",
            ncol(native_mat), ncol(alien_mat), ncol(all_mat)))
## Native species: 2378
## Alien species:  323
## All species:    2700
# Decade summary
cat("\nPlots per decade:\n")
## 
## Plots per decade:
print(header[, .N, by = decade][order(decade)])
##      decade     N
##      <char> <int>
## 1:    1970s  2386
## 2:    1980s  3171
## 3:    1990s  8814
## 4:    2000s  4992
## 5:    2010s  8285
## 6:    2020s   104
## 7: pre-1970  2105

2. Temporal Accumulation Curves (per decade)

By splitting into decades we get smaller matrices (1K-9K sites each), enabling high-seed-count runs with temporal resolution.

decades <- sort(unique(header$decade))

rds_decade <- file.path(comp_dir, "decade_results.rds")
if (file.exists(rds_decade)) {
  cat("Loading cached decade results...\n")
  decade_results <- readRDS(rds_decade)
} else {
  decade_results <- lapply(decades, function(d) {
    idx <- which(header$decade == d)
    n <- length(idx)
    cat(sprintf("  %s: %d sites\n", d, n))
    spacc(all_mat[idx, , drop = FALSE], coords[idx, ],
          n_seeds = min(100, n), method = "knn",
          distance = "haversine", seed = 42)
  })
  names(decade_results) <- decades
  saveRDS(decade_results, rds_decade)
}
## Loading cached decade results...
decade_combined <- do.call(c, decade_results)
plot(decade_combined) + ggtitle("Spatial accumulation by decade")


3. Native vs Alien (per decade)

Same spatial walks, different species tallies. Using the groups argument for efficiency.

status_vec <- species[match(colnames(all_mat), WFO_TAXON), STATUS]

rds_decade_na <- file.path(comp_dir, "decade_na_results.rds")
if (file.exists(rds_decade_na)) {
  cat("Loading cached decade native/alien results...\n")
  decade_na_results <- readRDS(rds_decade_na)
} else {
  decade_na_results <- lapply(decades, function(d) {
    idx <- which(header$decade == d)
    n <- length(idx)
    spacc(all_mat[idx, , drop = FALSE], coords[idx, ],
          n_seeds = min(100, n), method = "knn",
          distance = "haversine", groups = status_vec, seed = 42)
  })
  names(decade_na_results) <- decades
  saveRDS(decade_na_results, rds_decade_na)
}
## Loading cached decade native/alien results...
for (d in decades) {
  print(plot_grouped_normalized(decade_na_results[[d]],
          title = sprintf("%s: Native vs Alien (normalized)", d)))
}


4. Pooled Analysis (all decades combined)

For overall comparisons we use the full dataset with moderate seed counts.

rds_pooled <- file.path(comp_dir, "pooled.rds")
if (file.exists(rds_pooled)) {
  cat("Loading cached pooled results...\n")
  p <- readRDS(rds_pooled)
  sac_native <- p$native; sac_alien <- p$alien; sac_all <- p$all
  rm(p)
} else {
  sac_native <- spacc(native_mat, coords, n_seeds = 30, method = "knn",
                      distance = "haversine", seed = 42)
  sac_alien  <- spacc(alien_mat,  coords, n_seeds = 30, method = "knn",
                      distance = "haversine", seed = 42)
  sac_all    <- spacc(all_mat,    coords, n_seeds = 30, method = "knn",
                      distance = "haversine", seed = 42)
  saveRDS(list(native = sac_native, alien = sac_alien, all = sac_all), rds_pooled)
}
## Loading cached pooled results...
plot_normalized(Native = sac_native, Alien = sac_alien,
                title = "Pooled: Native vs Alien (normalized)")

Compare native vs alien statistically

rds_compare <- file.path(comp_dir, "compare.rds")
if (file.exists(rds_compare)) {
  comp <- readRDS(rds_compare)
} else {
  comp <- compare(sac_native, sac_alien, method = "permutation", n_perm = 999)
  saveRDS(comp, rds_compare)
}
print(comp)
## Comparison: sac_native vs sac_alien 
## ---------------------------------------- 
## Method: permutation (n=999)
## AUC difference: 50036933.1 (p = 0.000***)
## Saturation: sac_native at 18661 sites, sac_alien at 24060 sites
## 
## sac_native saturates faster.
plot(comp)


5. Species Richness Heatmaps

Map per-site species richness across Austria to reveal spatial hotspots. These are the real data equivalent of the simulated heatmaps from Day 2 theory.

library(sf)
library(rnaturalearth)

austria_sf <- ne_countries(scale = "medium", country = "Austria", returnclass = "sf")

# Per-site richness from the matrices
site_richness <- data.frame(
  x = coords$x,
  y = coords$y,
  native  = rowSums(native_mat),
  alien   = rowSums(alien_mat),
  total   = rowSums(all_mat)
)
site_richness$alien_pct <- site_richness$alien / site_richness$total * 100

# Native heatmap
p_native <- ggplot() +
  geom_sf(data = austria_sf, fill = "grey95", color = "#666666", linewidth = 0.8) +
  geom_point(data = site_richness, aes(x = x, y = y, color = native),
             size = 0.4, alpha = 0.6) +
  geom_sf(data = austria_sf, fill = NA, color = "#333333", linewidth = 1) +
  scale_color_gradient(low = "#E8F5E9", high = "#1B5E20", name = "Native\nSpecies") +
  labs(title = "Native Species Richness per Plot",
       x = "Longitude", y = "Latitude") +
  theme_minimal(base_size = 12) +
  coord_sf(xlim = c(9.3, 17.2), ylim = c(46.3, 49.1))

# Alien heatmap
p_alien <- ggplot() +
  geom_sf(data = austria_sf, fill = "grey95", color = "#666666", linewidth = 0.8) +
  geom_point(data = site_richness, aes(x = x, y = y, color = alien),
             size = 0.4, alpha = 0.6) +
  geom_sf(data = austria_sf, fill = NA, color = "#333333", linewidth = 1) +
  scale_color_gradient(low = "#FFEBEE", high = "#B71C1C", name = "Alien\nSpecies") +
  labs(title = "Alien Species Richness per Plot",
       x = "Longitude", y = "Latitude") +
  theme_minimal(base_size = 12) +
  coord_sf(xlim = c(9.3, 17.2), ylim = c(46.3, 49.1))

gridExtra::grid.arrange(p_native, p_alien, ncol = 1)

# Proportion alien map
ggplot() +
  geom_sf(data = austria_sf, fill = "grey95", color = "#666666", linewidth = 0.8) +
  geom_point(data = site_richness, aes(x = x, y = y, color = alien_pct),
             size = 0.4, alpha = 0.6) +
  geom_sf(data = austria_sf, fill = NA, color = "#333333", linewidth = 1) +
  scale_color_gradient2(low = "#1B5E20", mid = "#FFF9C4", high = "#B71C1C",
                        midpoint = median(site_richness$alien_pct, na.rm = TRUE),
                        name = "Alien %") +
  labs(title = "Alien Species as % of Total Richness",
       x = "Longitude", y = "Latitude") +
  theme_minimal(base_size = 12) +
  coord_sf(xlim = c(9.3, 17.2), ylim = c(46.3, 49.1))


6. Detecting Invasion Hotspots via Starting Points

Accumulation curves reveal where species are concentrated. By starting spatial walks from different regions, we can detect invasion hotspots: if aliens accumulate faster when starting near cities, that confirms urban areas are introduction points. Conversely, native species should accumulate faster from alpine starts. We pick seeds from three regions: the Vienna lowlands (east, urban), the Alps (west, natural), and central Austria.

# Define three geographic regions
regions <- list(
  "Vienna & East" = list(lon = c(15.5, 17.0), lat = c(47.5, 48.5), color = "#D32F2F"),
  "Alps (West)"   = list(lon = c(10.0, 12.5), lat = c(46.5, 47.5), color = "#1976D2"),
  "Center"        = list(lon = c(13.0, 15.0), lat = c(47.5, 48.5), color = "#7B1FA2")
)

# Find sites in each region
region_indices <- lapply(regions, function(r) {
  which(coords$x >= r$lon[1] & coords$x <= r$lon[2] &
        coords$y >= r$lat[1] & coords$y <= r$lat[2])
})

# Map showing regions and plot locations
region_df <- do.call(rbind, lapply(names(regions), function(nm) {
  idx <- region_indices[[nm]]
  data.frame(x = coords$x[idx], y = coords$y[idx], region = nm)
}))

cat("Sites per region:\n")
## Sites per region:
for (nm in names(regions)) cat(sprintf("  %s: %d sites\n", nm, length(region_indices[[nm]])))
##   Vienna & East: 10810 sites
##   Alps (West): 1732 sites
##   Center: 5871 sites
ggplot() +
  geom_sf(data = austria_sf, fill = "grey95", color = "#666666", linewidth = 0.8) +
  geom_point(data = data.frame(x = coords$x, y = coords$y),
             aes(x = x, y = y), color = "grey80", size = 0.2, alpha = 0.3) +
  geom_point(data = region_df, aes(x = x, y = y, color = region), size = 0.8, alpha = 0.6) +
  geom_sf(data = austria_sf, fill = NA, color = "#333333", linewidth = 1) +
  scale_color_manual(values = c("Vienna & East" = "#D32F2F",
                                 "Alps (West)" = "#1976D2",
                                 "Center" = "#7B1FA2")) +
  labs(title = "Starting Regions for Invasion Hotspot Detection",
       subtitle = "Where you start reveals where species are concentrated",
       x = "Longitude", y = "Latitude", color = "Region") +
  theme_minimal(base_size = 12) +
  coord_sf(xlim = c(9.3, 17.2), ylim = c(46.3, 49.1))

rds_startpoint <- file.path(comp_dir, "startpoint.rds")
if (file.exists(rds_startpoint)) {
  sp_results <- readRDS(rds_startpoint)
} else {
  n_seeds_per_region <- 5
  set.seed(42)
  nc <- parallel::detectCores(logical = FALSE)

  sp_results <- lapply(names(regions), function(nm) {
    idx <- region_indices[[nm]]
    # Sample seed sites (0-based for C++)
    seeds <- sample(idx, min(n_seeds_per_region, length(idx)), replace = FALSE) - 1L

    # Run from these specific seeds on ALL sites (full dataset kNN walk)
    curves_all    <- spacc:::cpp_knn_kdtree_parallel_seeds(
      all_mat, coords$x, coords$y, seeds, n_cores = nc)
    curves_native <- spacc:::cpp_knn_kdtree_parallel_seeds(
      native_mat, coords$x, coords$y, seeds, n_cores = nc)
    curves_alien  <- spacc:::cpp_knn_kdtree_parallel_seeds(
      alien_mat, coords$x, coords$y, seeds, n_cores = nc)

    list(all = curves_all, native = curves_native, alien = curves_alien,
         seeds = seeds, n_seeds = length(seeds))
  })
  names(sp_results) <- names(regions)
  saveRDS(sp_results, rds_startpoint)
}

# Plot: all-species curves by starting region
curves_df <- do.call(rbind, lapply(names(sp_results), function(nm) {
  mat <- sp_results[[nm]]$all
  n <- ncol(mat)
  total <- max(mat)
  data.frame(
    sites = seq_len(n),
    mean  = colMeans(mat) / total,
    region = nm
  )
}))
curves_df$region <- factor(curves_df$region, levels = names(regions))

p1 <- ggplot(curves_df, aes(x = sites, y = mean, color = region)) +
  geom_line(linewidth = 1) +
  scale_color_manual(values = c("Vienna & East" = "#D32F2F",
                                 "Alps (West)" = "#1976D2",
                                 "Center" = "#7B1FA2")) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "All Species: Curves by Starting Region",
       x = "Sites sampled", y = "Proportion of total species",
       color = "Start Region") +
  theme_minimal(base_size = 12)

# Plot: native curves by starting region
native_df <- do.call(rbind, lapply(names(sp_results), function(nm) {
  mat <- sp_results[[nm]]$native
  n <- ncol(mat)
  total <- max(mat)
  data.frame(
    sites = seq_len(n),
    mean  = colMeans(mat) / total,
    region = nm
  )
}))
native_df$region <- factor(native_df$region, levels = names(regions))

p2 <- ggplot(native_df, aes(x = sites, y = mean, color = region)) +
  geom_line(linewidth = 1) +
  scale_color_manual(values = c("Vienna & East" = "#D32F2F",
                                 "Alps (West)" = "#1976D2",
                                 "Center" = "#7B1FA2")) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Native Species: Curves by Starting Region",
       x = "Sites sampled", y = "Proportion of total native species",
       color = "Start Region") +
  theme_minimal(base_size = 12)

# Plot: alien curves by starting region
alien_df <- do.call(rbind, lapply(names(sp_results), function(nm) {
  mat <- sp_results[[nm]]$alien
  n <- ncol(mat)
  total <- max(mat)
  data.frame(
    sites = seq_len(n),
    mean  = colMeans(mat) / total,
    region = nm
  )
}))
alien_df$region <- factor(alien_df$region, levels = names(regions))

p3 <- ggplot(alien_df, aes(x = sites, y = mean, color = region)) +
  geom_line(linewidth = 1) +
  scale_color_manual(values = c("Vienna & East" = "#D32F2F",
                                 "Alps (West)" = "#1976D2",
                                 "Center" = "#7B1FA2")) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Alien Species: Curves by Starting Region",
       x = "Sites sampled", y = "Proportion of total alien species",
       color = "Start Region") +
  theme_minimal(base_size = 12)

gridExtra::grid.arrange(p1, p2, p3, ncol = 1)

# Zoom into first 1000 sites to see early differences clearly
n_zoom <- min(1000, ncol(sp_results[[1]]$all))

early_df <- do.call(rbind, lapply(names(sp_results), function(nm) {
  mat_n <- sp_results[[nm]]$native
  mat_a <- sp_results[[nm]]$alien
  total_n <- max(mat_n)
  total_a <- max(mat_a)
  data.frame(
    sites = rep(seq_len(n_zoom), 2),
    mean  = c(colMeans(mat_n)[1:n_zoom] / total_n,
              colMeans(mat_a)[1:n_zoom] / total_a),
    group = rep(c("Native", "Alien"), each = n_zoom),
    region = nm
  )
}))
early_df$label <- paste(early_df$region, "-", early_df$group)
early_df$region <- factor(early_df$region, levels = names(regions))

ggplot(early_df, aes(x = sites, y = mean, color = region, linetype = group)) +
  geom_line(linewidth = 0.8) +
  scale_color_manual(values = c("Vienna & East" = "#D32F2F",
                                 "Alps (West)" = "#1976D2",
                                 "Center" = "#7B1FA2")) +
  scale_linetype_manual(values = c("Native" = "solid", "Alien" = "dashed")) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = sprintf("Early Accumulation (first %d sites): Native vs Alien by Region", n_zoom),
       subtitle = "Faster alien accumulation from Vienna confirms urban invasion hotspots",
       x = "Sites sampled", y = "Proportion of total species",
       color = "Start Region", linetype = "Species Group") +
  theme_minimal(base_size = 12)


7. Extrapolation

Fit asymptotic models to estimate total richness for each group.

rds_extrap <- file.path(comp_dir, "extrapolate.rds")
if (file.exists(rds_extrap)) {
  e <- readRDS(rds_extrap)
  fit_native <- e$native; fit_alien <- e$alien
  rm(e)
} else {
  fit_native <- extrapolate(sac_native, model = "michaelis-menten")
  fit_alien  <- extrapolate(sac_alien,  model = "michaelis-menten")
  saveRDS(list(native = fit_native, alien = fit_alien), rds_extrap)
}

cat("Native:\n"); print(fit_native)
## Native:
## Extrapolation: michaelis-menten 
## ------------------------------ 
## Estimated asymptote: 2525.4 species
## 95% CI: 2522.4 - 2528.5
## AIC: 354656.5
## Observed: 2378.0 species (94% of estimated)
cat("\nAlien:\n");  print(fit_alien)
## 
## Alien:
## Extrapolation: michaelis-menten 
## ------------------------------ 
## Estimated asymptote: 410.9 species
## 95% CI: 410.0 - 411.8
## AIC: 233579.6
## Observed: 323.0 species (79% of estimated)
plot(fit_native)

plot(fit_alien)


7b. Per-Hexagon Estimated Richness Maps (SAC-based)

For each hexagon in an equal-area grid, run a spatial accumulation with 10 seeds on the within-hex plots, then fit Michaelis-Menten to estimate total species richness (S_max). This gives estimated richness per hexagon rather than raw observed counts.

library(hexify)
library(sf)
library(rnaturalearth)

austria_sf <- ne_countries(scale = "medium", country = "Austria", returnclass = "sf")
grid <- hex_grid(area_km2 = 750)

# Assign plots to hexagons using hexify()
hx <- hexify(data.frame(lon = header$Longitude, lat = header$Latitude), grid = grid)
plot_hex <- data.table(
  PlotObservationID = header$PlotObservationID,
  lon = header$Longitude,
  lat = header$Latitude,
  cell_id = hx[["cell_id"]]
)

# Count plots per hex
hex_counts <- plot_hex[, .N, by = cell_id]
# Keep hexagons with >= 20 plots (need enough for meaningful SAC + extrapolation)
keep_cells <- hex_counts[N >= 20]$cell_id
cat(sprintf("Hexagons with >= 20 plots: %d (out of %d)\n",
            length(keep_cells), nrow(hex_counts)))
## Hexagons with >= 20 plots: 118 (out of 130)
rds_hex_sac <- file.path(comp_dir, "hex_sac_estimates.rds")

if (file.exists(rds_hex_sac)) {
  cat("Loading cached per-hexagon SAC estimates...\n")
  hex_estimates <- readRDS(rds_hex_sac)
} else {
  hex_estimates <- rbindlist(lapply(keep_cells, function(cid) {
    idx <- which(plot_hex$cell_id == cid)
    n <- length(idx)

    # Run spacc within this hexagon
    result <- tryCatch({
      sac_nat <- spacc(native_mat[idx, , drop = FALSE],
                       coords[idx, ], n_seeds = 10, method = "knn",
                       distance = "haversine", seed = 42)
      sac_ali <- spacc(alien_mat[idx, , drop = FALSE],
                       coords[idx, ], n_seeds = 10, method = "knn",
                       distance = "haversine", seed = 42)
      sac_tot <- spacc(all_mat[idx, , drop = FALSE],
                       coords[idx, ], n_seeds = 10, method = "knn",
                       distance = "haversine", seed = 42)

      # Extrapolate Michaelis-Menten
      fit_nat <- tryCatch(extrapolate(sac_nat, model = "michaelis-menten"),
                          error = function(e) NULL)
      fit_ali <- tryCatch(extrapolate(sac_ali, model = "michaelis-menten"),
                          error = function(e) NULL)
      fit_tot <- tryCatch(extrapolate(sac_tot, model = "michaelis-menten"),
                          error = function(e) NULL)

      # Observed species counts
      obs_native <- ncol(native_mat[idx, colSums(native_mat[idx, , drop = FALSE]) > 0,
                                     drop = FALSE])
      obs_alien  <- ncol(alien_mat[idx, colSums(alien_mat[idx, , drop = FALSE]) > 0,
                                    drop = FALSE])
      obs_total  <- ncol(all_mat[idx, colSums(all_mat[idx, , drop = FALSE]) > 0,
                                  drop = FALSE])

      data.table(
        cell_id     = cid,
        n_plots     = n,
        obs_native  = obs_native,
        obs_alien   = obs_alien,
        obs_total   = obs_total,
        est_native  = if (!is.null(fit_nat)) fit_nat$asymptote else NA_real_,
        est_alien   = if (!is.null(fit_ali)) fit_ali$asymptote else NA_real_,
        est_total   = if (!is.null(fit_tot)) fit_tot$asymptote else NA_real_
      )
    }, error = function(e) {
      data.table(cell_id = cid, n_plots = n,
                 obs_native = NA, obs_alien = NA, obs_total = NA,
                 est_native = NA, est_alien = NA, est_total = NA)
    })
    cat(sprintf("  hex %s: %d plots, est_native=%.0f, est_alien=%.0f\n",
                cid, n,
                result$est_native, result$est_alien))
    result
  }))
  saveRDS(hex_estimates, rds_hex_sac)
}
## Loading cached per-hexagon SAC estimates...
cat(sprintf("Estimated richness for %d hexagons\n", nrow(hex_estimates)))
## Estimated richness for 118 hexagons
cat(sprintf("  Native range: %.0f - %.0f\n",
            min(hex_estimates$est_native, na.rm = TRUE),
            max(hex_estimates$est_native, na.rm = TRUE)))
##   Native range: 270 - 3614
cat(sprintf("  Alien range:  %.0f - %.0f\n",
            min(hex_estimates$est_alien, na.rm = TRUE),
            max(hex_estimates$est_alien, na.rm = TRUE)))
##   Alien range:  2 - 872
hex_estimates[, completeness_native := obs_native / est_native * 100]
hex_estimates[, completeness_alien  := obs_alien / est_alien * 100]
hex_estimates[, completeness_total  := obs_total / est_total * 100]

cat(sprintf("Median completeness: native=%.1f%%, alien=%.1f%%\n",
            median(hex_estimates$completeness_native, na.rm = TRUE),
            median(hex_estimates$completeness_alien, na.rm = TRUE)))
## Median completeness: native=64.8%, alien=56.8%
hex_polys <- cell_to_sf(hex_estimates$cell_id, grid = grid)
hex_polys <- merge(hex_polys, hex_estimates, by = "cell_id")

# Estimated native richness
p1 <- ggplot() +
  geom_sf(data = austria_sf, fill = "grey95", color = "#333333", linewidth = 0.8) +
  geom_sf(data = hex_polys, aes(fill = est_native), color = "white", linewidth = 0.3) +
  geom_sf(data = austria_sf, fill = NA, color = "#333333", linewidth = 1) +
  scale_fill_gradient(low = "white", high = "#2E7D32",
                      name = "Estimated\nnative S_max") +
  labs(title = "Estimated Native Species Richness (Michaelis-Menten S_max per hexagon)",
       x = "Longitude", y = "Latitude") +
  theme_minimal(base_size = 12) +
  coord_sf(xlim = c(9.3, 17.2), ylim = c(46.3, 49.1))

# Estimated alien richness
p2 <- ggplot() +
  geom_sf(data = austria_sf, fill = "grey95", color = "#333333", linewidth = 0.8) +
  geom_sf(data = hex_polys, aes(fill = est_alien), color = "white", linewidth = 0.3) +
  geom_sf(data = austria_sf, fill = NA, color = "#333333", linewidth = 1) +
  scale_fill_gradient(low = "white", high = "#D32F2F",
                      name = "Estimated\nalien S_max") +
  labs(title = "Estimated Alien Species Richness (Michaelis-Menten S_max per hexagon)",
       x = "Longitude", y = "Latitude") +
  theme_minimal(base_size = 12) +
  coord_sf(xlim = c(9.3, 17.2), ylim = c(46.3, 49.1))

# Completeness: % observed / estimated
p3 <- ggplot() +
  geom_sf(data = austria_sf, fill = "grey95", color = "#333333", linewidth = 0.8) +
  geom_sf(data = hex_polys, aes(fill = completeness_alien), color = "white", linewidth = 0.3) +
  geom_sf(data = austria_sf, fill = NA, color = "#333333", linewidth = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1,
                       name = "Alien\ncompleteness %") +
  labs(title = "Alien Sampling Completeness per Hexagon (observed / estimated)",
       x = "Longitude", y = "Latitude") +
  theme_minimal(base_size = 12) +
  coord_sf(xlim = c(9.3, 17.2), ylim = c(46.3, 49.1))

gridExtra::grid.arrange(p1, p2, p3, ncol = 1)

Per-Habitat Estimated Richness

Same approach but grouping by EUNIS habitat type rather than hexagon.

# Define habitat variables (also used later in section 16)
habitat    <- header$Eunis_lvl1
hab_counts <- table(habitat)
keep_habs  <- names(hab_counts[hab_counts >= 100])
keep_habs  <- keep_habs[!keep_habs %in% c("", "~")]

rds_hab_sac <- file.path(comp_dir, "habitat_sac_estimates.rds")

if (file.exists(rds_hab_sac)) {
  cat("Loading cached per-habitat SAC estimates...\n")
  hab_estimates <- readRDS(rds_hab_sac)
} else {
  hab_estimates <- rbindlist(lapply(keep_habs, function(h) {
    idx <- which(habitat == h)
    n <- length(idx)

    result <- tryCatch({
      sac_nat <- spacc(native_mat[idx, , drop = FALSE],
                       coords[idx, ], n_seeds = 10, method = "knn",
                       distance = "haversine", seed = 42)
      sac_ali <- spacc(alien_mat[idx, , drop = FALSE],
                       coords[idx, ], n_seeds = 10, method = "knn",
                       distance = "haversine", seed = 42)

      fit_nat <- tryCatch(extrapolate(sac_nat, model = "michaelis-menten"),
                          error = function(e) NULL)
      fit_ali <- tryCatch(extrapolate(sac_ali, model = "michaelis-menten"),
                          error = function(e) NULL)

      obs_native <- ncol(native_mat[idx, colSums(native_mat[idx, , drop = FALSE]) > 0,
                                     drop = FALSE])
      obs_alien  <- ncol(alien_mat[idx, colSums(alien_mat[idx, , drop = FALSE]) > 0,
                                    drop = FALSE])

      data.table(
        habitat     = h,
        n_plots     = n,
        obs_native  = obs_native,
        obs_alien   = obs_alien,
        est_native  = if (!is.null(fit_nat)) fit_nat$asymptote else NA_real_,
        est_alien   = if (!is.null(fit_ali)) fit_ali$asymptote else NA_real_,
        completeness_native = if (!is.null(fit_nat)) obs_native / fit_nat$asymptote * 100 else NA,
        completeness_alien  = if (!is.null(fit_ali)) obs_alien / fit_ali$asymptote * 100 else NA
      )
    }, error = function(e) {
      data.table(habitat = h, n_plots = n,
                 obs_native = NA, obs_alien = NA,
                 est_native = NA, est_alien = NA,
                 completeness_native = NA, completeness_alien = NA)
    })
    cat(sprintf("  %s: %d plots, est_native=%.0f, est_alien=%.0f\n",
                h, n, result$est_native, result$est_alien))
    result
  }))
  saveRDS(hab_estimates, rds_hab_sac)
}
## Loading cached per-habitat SAC estimates...
print(hab_estimates)
##    habitat n_plots obs_native obs_alien est_native   est_alien
##     <char>   <int>      <int>     <int>      <num>       <num>
## 1:       P     223        287         2    430.150    4.722158
## 2:       Q    1102        761        43   1059.577   87.765180
## 3:       R   14193       2100       174   2242.250  263.673285
## 4:       S    1109       1361        67   1635.407 1099.594735
## 5:       T    7001       1583       135   1803.650  201.062973
## 6:       U     327        834        56   1456.727  560.110861
## 7:       V    4642        875       195   1096.790  280.025930
##    completeness_native completeness_alien
##                  <num>              <num>
## 1:            66.72092          42.353520
## 2:            71.82113          48.994373
## 3:            93.65592          65.990758
## 4:            83.22087           6.093154
## 5:            87.76647          67.143143
## 6:            57.25165           9.998021
## 7:            79.77827          69.636408

8. Methods Comparison

Compare kNN (spatially structured) vs random (null model) accumulation. We use the 1990s decade (largest chunk) for speed.

idx_90s <- which(header$decade == "1990s")

rds_methods <- file.path(comp_dir, "methods_comparison.rds")
if (file.exists(rds_methods)) {
  m <- readRDS(rds_methods)
  sac_knn_m <- m$knn; sac_random_m <- m$random
  rm(m)
} else {
  sac_knn_m    <- spacc(all_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                        method = "knn",    n_seeds = 50,
                        distance = "haversine", seed = 42)
  sac_random_m <- spacc(all_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                        method = "random", n_seeds = 50,
                        distance = "haversine", seed = 42)
  saveRDS(list(knn = sac_knn_m, random = sac_random_m), rds_methods)
}

methods_combined <- c(kNN = sac_knn_m, Random = sac_random_m)
plot(methods_combined) + ggtitle("kNN vs Random (1990s subset)")


9. Hill Numbers

Track effective species diversity (q = 0, 1, 2) as sites accumulate. Using 1990s decade.

rds_hill <- file.path(comp_dir, "hill.rds")
if (file.exists(rds_hill)) {
  hill <- readRDS(rds_hill)
} else {
  hill <- spaccHill(all_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                    q = c(0, 1, 2), n_seeds = 30,
                    method = "knn", distance = "haversine", seed = 42)
  saveRDS(hill, rds_hill)
}
print(hill)
## spacc Hill numbers: 8814 sites, 2700 species, 30 seeds
## Orders (q): 0, 1, 2
## Method: knn
plot(hill)

9b. Hill Numbers: Native vs Alien

Compare Hill number accumulation separately for native and alien species.

rds_hill_na <- file.path(comp_dir, "hill_native_alien.rds")
if (file.exists(rds_hill_na)) {
  hill_na <- readRDS(rds_hill_na)
} else {
  hill_native <- spaccHill(native_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                           q = c(0, 1, 2), n_seeds = 30,
                           method = "knn", distance = "haversine", seed = 42)
  hill_alien  <- spaccHill(alien_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                           q = c(0, 1, 2), n_seeds = 30,
                           method = "knn", distance = "haversine", seed = 42)
  hill_na <- list(native = hill_native, alien = hill_alien)
  saveRDS(hill_na, rds_hill_na)
}
cat("Native Hill:", class(hill_na$native), "\n")
## Native Hill: spacc_hill
cat("Alien Hill:", class(hill_na$alien), "\n")
## Alien Hill: spacc_hill

10. Beta Diversity Accumulation

Track how turnover and nestedness change as the sampling window expands.

rds_beta <- file.path(comp_dir, "beta.rds")
if (file.exists(rds_beta)) {
  beta <- readRDS(rds_beta)
} else {
  beta <- spaccBeta(all_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                    n_seeds = 30, index = "sorensen",
                    distance = "haversine", seed = 42)
  saveRDS(beta, rds_beta)
}
print(beta)
## spacc beta diversity: 8814 sites, 30 seeds
## Index: sorensen, Method: knn
## Mean final beta: 0.976 (turnover: 0.000, nestedness: 0.976)
plot(beta)


11. Coverage-Based Rarefaction

How many sites do we need for 90%, 95%, 99% completeness?

rds_cov <- file.path(comp_dir, "coverage.rds")
if (file.exists(rds_cov)) {
  cov <- readRDS(rds_cov)
} else {
  cov <- spaccCoverage(all_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                       n_seeds = 30,
                       distance = "haversine", seed = 42)
  saveRDS(cov, rds_cov)
}
print(cov)
## spacc coverage: 8814 sites, 2700 species, 30 seeds
## Mean final coverage: 99.9%
## Mean final richness: 2053.0 species
plot(cov)

# Interpolate to standard coverage targets
interp <- interpolateCoverage(cov, target = c(0.90, 0.95, 0.99))
cat("\nSites needed for target coverage:\n")
## 
## Sites needed for target coverage:
print(round(colMeans(interp, na.rm = TRUE), 1))
##   C90   C95   C99 
## 134.1 192.9 463.1

12. Per-Site Metrics & Spatial Map

Compute accumulation metrics for every site as a starting point. Using 1990s subset.

rds_metrics <- file.path(comp_dir, "metrics.rds")
if (file.exists(rds_metrics)) {
  metrics <- readRDS(rds_metrics)
} else {
  metrics <- spaccMetrics(all_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                          metrics = c("slope_10", "half_richness", "auc"),
                          method = "knn", distance = "haversine")
  saveRDS(metrics, rds_metrics)
}
print(metrics)
## spacc_metrics: 8814 sites, 2700 species
## Method: knn
## Metrics: slope_10, half_richness, auc
summary(metrics)
## Metric summary:
##   slope_10: mean=0.82, sd=0.17, range=[0.47, 1.23]
##   half_richness: mean=2050.14, sd=486.35, range=[1204.00, 3454.00]
##   auc: mean=1585.70, sd=38.80, range=[1503.78, 1658.52]
plot(metrics, type = "points")


13. Diversity Partitioning

Decompose regional diversity into alpha, beta, and gamma components.

rds_part <- file.path(comp_dir, "partition.rds")
if (file.exists(rds_part)) {
  partition <- readRDS(rds_part)
} else {
  partition <- diversityPartition(all_mat[idx_90s, , drop = FALSE], q = c(0, 1, 2),
                                  coords = data.frame(x = coords$x[idx_90s], y = coords$y[idx_90s]))
  saveRDS(partition, rds_part)
}
print(partition)
## Alpha-Beta-Gamma Diversity Partitioning
## 8814 sites, 2700 species
## 
##  q alpha  beta   gamma
##  0 30.54 67.22 2053.00
##  1 26.54 29.17  774.29
##  2 21.09 23.27  490.89
## 
## Interpretation:
##   Alpha = mean effective species per site
##   Beta  = effective number of communities (1 to n_sites)
##   Gamma = regional effective species (gamma = alpha x beta)
plot(partition)

Alpha diversity per site

rds_alpha <- file.path(comp_dir, "alpha.rds")
if (file.exists(rds_alpha)) {
  alpha <- readRDS(rds_alpha)
} else {
  alpha <- alphaDiversity(all_mat[idx_90s, , drop = FALSE], q = c(0, 1, 2),
                          coords = data.frame(x = coords$x[idx_90s], y = coords$y[idx_90s]))
  saveRDS(alpha, rds_alpha)
}
print(alpha)
## spacc alpha diversity: 8814 sites, 2700 species
## Orders (q): 0, 1, 2
## Coordinates: available
plot(alpha)


14. Analytical Benchmarks

Compare simulation-based curves to exact analytical expectations. Using 1990s subset.

cole <- coleman(all_mat[idx_90s, , drop = FALSE])
mt   <- mao_tau(all_mat[idx_90s, , drop = FALSE])

plot(sac_knn_m)
# Overlay analytical curves for comparison

15. Grouped Accumulation by Status

Use the groups argument to run a single accumulation with automatic native/alien splitting. Using 1990s subset.

rds_grouped <- file.path(comp_dir, "grouped_status.rds")
if (file.exists(rds_grouped)) {
  sac_grouped <- readRDS(rds_grouped)
} else {
  sac_grouped <- spacc(all_mat[idx_90s, , drop = FALSE], coords[idx_90s, ],
                       n_seeds = 50, method = "knn",
                       distance = "haversine", groups = status_vec, seed = 42)
  saveRDS(sac_grouped, rds_grouped)
}
plot_grouped_normalized(sac_grouped,
                        title = "Grouped by Status (normalized)")


16. Habitat-Level Analysis (EUNIS)

Compare accumulation curves across broad habitat types (EUNIS level 1). We subset to habitats with at least 100 plots.

habitat <- header$Eunis_lvl1

hab_counts <- table(habitat)
keep_habs  <- names(hab_counts[hab_counts >= 100])
keep_habs  <- keep_habs[!keep_habs %in% c("", "~")]  # drop junk codes
cat(sprintf("Habitats with >= 100 plots: %s\n", paste(keep_habs, collapse = ", ")))
## Habitats with >= 100 plots: P, Q, R, S, T, U, V
cat("Plot counts:\n")
## Plot counts:
print(hab_counts[keep_habs])
## habitat
##     P     Q     R     S     T     U     V 
##   223  1102 14193  1109  7001   327  4642
rds_hab <- file.path(comp_dir, "habitat_curves.rds")
if (file.exists(rds_hab)) {
  hab_results <- readRDS(rds_hab)
} else {
  hab_results <- lapply(keep_habs, function(h) {
    idx <- which(habitat == h)
    spacc(all_mat[idx, , drop = FALSE],
          coords[idx, ],
          n_seeds = 50, method = "knn",
          distance = "haversine", seed = 42)
  })
  names(hab_results) <- keep_habs
  saveRDS(hab_results, rds_hab)
}
hab_combined <- do.call(c, hab_results)
plot(hab_combined) + ggtitle("Accumulation by EUNIS habitat (level 1)")

Native vs alien within habitats

Are aliens more concentrated in certain habitats?

hab_alien <- header[, .(PlotObservationID, Eunis_lvl1)]
hab_alien <- merge(hab_alien,
                   species[, .(n_native = sum(STATUS == "native"),
                               n_alien  = sum(STATUS == "neo")),
                           by = PlotObservationID],
                   by = "PlotObservationID")
hab_summary <- hab_alien[Eunis_lvl1 %in% keep_habs,
                         .(mean_native = mean(n_native),
                           mean_alien  = mean(n_alien),
                           alien_pct   = mean(n_alien / (n_native + n_alien)) * 100),
                         by = Eunis_lvl1]
print(hab_summary[order(-alien_pct)])
##    Eunis_lvl1 mean_native mean_alien  alien_pct
##        <char>       <num>      <num>      <num>
## 1:          V   18.189573 2.65812150 14.7518372
## 2:          U   15.547401 0.57186544  2.7899982
## 3:          Q   11.684211 0.25045372  2.1949699
## 4:          T   34.986288 0.65119269  1.9088159
## 5:          R   29.732403 0.50806736  1.8607575
## 6:          S   26.117223 0.23805230  0.9541968
## 7:          P    5.834081 0.01793722  0.1002564
rds_hab_na <- file.path(comp_dir, "habitat_native_alien.rds")
if (file.exists(rds_hab_na)) {
  hab_na_results <- readRDS(rds_hab_na)
} else {
  hab_na_results <- lapply(keep_habs, function(h) {
    nm <- native_mat[which(habitat == h), , drop = FALSE]
    am <- alien_mat[which(habitat == h), , drop = FALSE]
    nm <- nm[, colSums(nm) > 0, drop = FALSE]
    am <- am[, colSums(am) > 0, drop = FALSE]
    if (ncol(nm) < 2 || ncol(am) < 2 || nrow(nm) < 3) return(NULL)
    idx <- which(habitat == h)
    list(
      native = spacc(nm, coords[idx, ],
                     n_seeds = 30, method = "knn",
                     distance = "haversine", seed = 42),
      alien  = spacc(am, coords[idx, ],
                     n_seeds = 30, method = "knn",
                     distance = "haversine", seed = 42)
    )
  })
  names(hab_na_results) <- keep_habs
  saveRDS(hab_na_results, rds_hab_na)
}

for (h in keep_habs) {
  if (is.null(hab_na_results[[h]])) next
  print(plot_normalized(Native = hab_na_results[[h]]$native,
                        Alien  = hab_na_results[[h]]$alien,
                        title = sprintf("Habitat %s: Native vs Alien (normalized)", h)))
}


17. Phylogenetic Diversity Accumulation

Track phylogenetic diversity (MPD, MNTD) as the sampling window expands spatially. Using 1990s subset for speed.

library(ape)
tree_cache <- file.path(data_path, "phylo_tree.rds")

if (file.exists(tree_cache)) {
  cat("Loading cached phylogeny...\n")
  tree <- readRDS(tree_cache)
  run_phylo <- TRUE
} else if (!requireNamespace("V.PhyloMaker2", quietly = TRUE)) {
  cat("V.PhyloMaker2 not installed and no cached tree found - skipping.\n",
      "Install with: remotes::install_github('jinyizju/V.PhyloMaker2')\n")
  run_phylo <- FALSE
} else {
  run_phylo <- TRUE
  library(V.PhyloMaker2)

  sp_lookup <- unique(species[, .(species = WFO_TAXON, family = WFO_FAMILY)])
  sp_lookup[, genus := sub(" .*", "", species)]

  cat(sprintf("Placing %d species on mega-tree (first run, will cache)...\n",
              nrow(sp_lookup)))
  phylo_result <- phylo.maker(
    sp.list = as.data.frame(sp_lookup[, .(species, genus, family)]),
    scenarios = "S3"
  )
  tree <- phylo_result$scenario.3
  tree$tip.label <- gsub("_", " ", tree$tip.label)

  saveRDS(tree, tree_cache)
  cat(sprintf("Tree saved to %s\n", tree_cache))
}
## Loading cached phylogeny...
if (run_phylo) {
  # Reload matrices for subsetting (freed above)
  mats <- readRDS(file.path(comp_dir, "matrices.rds"))
  all_mat_phylo <- mats$all; native_mat_phylo <- mats$native; alien_mat_phylo <- mats$alien
  rm(mats); invisible(gc())

  in_tree <- colnames(all_mat_phylo) %in% tree$tip.label
  cat(sprintf("Species matched to tree: %d / %d (%.0f%%)\n",
              sum(in_tree), ncol(all_mat_phylo), mean(in_tree) * 100))

  phylo_mat <- all_mat_phylo[idx_90s, in_tree, drop = FALSE]
  tree <- keep.tip(tree, colnames(all_mat_phylo)[in_tree])
}
## Species matched to tree: 2697 / 2700 (100%)
if (run_phylo) {
  rds_phylo <- file.path(comp_dir, "phylo_sac.rds")
  if (file.exists(rds_phylo)) {
    phylo_sac <- readRDS(rds_phylo)
  } else {
    phylo_sac <- spaccPhylo(phylo_mat, coords[idx_90s, ], tree = tree,
                             metric = c("mpd", "mntd"),
                             n_seeds = 30, method = "knn",
                             distance = "haversine", seed = 42)
    saveRDS(phylo_sac, rds_phylo)
  }
  print(phylo_sac)
  plot(phylo_sac)
}
## spacc phylogenetic diversity: 8814 sites, 2697 species, 30 seeds
## Metrics: mpd, mntd

Phylogenetic diversity: native vs alien

if (run_phylo) {
  rds_phylo_na <- file.path(comp_dir, "phylo_native_alien.rds")
  if (file.exists(rds_phylo_na)) {
    pna <- readRDS(rds_phylo_na)
    phylo_native <- pna$native; phylo_alien <- pna$alien
    rm(pna)
  } else {
    native_in_tree <- colnames(native_mat_phylo) %in% tree$tip.label
    alien_in_tree  <- colnames(alien_mat_phylo) %in% tree$tip.label

    phylo_native <- spaccPhylo(native_mat_phylo[idx_90s, native_in_tree, drop = FALSE],
                                coords[idx_90s, ], tree = tree, metric = "mpd",
                                n_seeds = 30, method = "knn",
                                distance = "haversine", seed = 42)
    phylo_alien  <- spaccPhylo(alien_mat_phylo[idx_90s, alien_in_tree, drop = FALSE],
                                coords[idx_90s, ], tree = tree, metric = "mpd",
                                n_seeds = 30, method = "knn",
                                distance = "haversine", seed = 42)
    saveRDS(list(native = phylo_native, alien = phylo_alien), rds_phylo_na)
  }

  cat("Native MPD accumulation:\n"); print(phylo_native)
  cat("\nAlien MPD accumulation:\n"); print(phylo_alien)
}
## Native MPD accumulation:
## spacc phylogenetic diversity: 8814 sites, 2375 species, 30 seeds
## Metrics: mpd
## 
## Alien MPD accumulation:
## spacc phylogenetic diversity: 8814 sites, 322 species, 30 seeds
## Metrics: mpd

18. Functional Diversity Accumulation

Track functional diversity (FDis) as sites accumulate. Using family + status as proxy traits. Using 1990s subset.

if (!requireNamespace("FD", quietly = TRUE)) {
  cat("FD package not installed - skipping functional analysis.\n",
      "Install with: install.packages('FD')\n")
  run_func <- FALSE
} else {
  run_func <- TRUE

  # Reload all_mat for subsetting
  mats <- readRDS(file.path(comp_dir, "matrices.rds"))
  all_mat_func <- mats$all
  rm(mats); invisible(gc())

  sp_info <- unique(species[, .(species = WFO_TAXON, family = WFO_FAMILY,
                                 status = STATUS)])
  sp_info <- sp_info[species %in% colnames(all_mat_func)]
  sp_info <- sp_info[!duplicated(species)]
  setkey(sp_info, species)

  trait_df <- data.frame(
    family = as.factor(sp_info$family),
    status = as.factor(sp_info$status),
    row.names = sp_info$species
  )

  shared_sp <- intersect(colnames(all_mat_func), rownames(trait_df))
  func_mat  <- all_mat_func[idx_90s, shared_sp, drop = FALSE]
  trait_df  <- trait_df[shared_sp, , drop = FALSE]

  # Convert factors to numeric dummy variables for distance computation
  trait_mat <- model.matrix(~ family + status - 1, data = trait_df)
  rownames(trait_mat) <- rownames(trait_df)

  cat(sprintf("Functional analysis: %d species with traits, %d sites\n",
              length(shared_sp), nrow(func_mat)))
  rm(all_mat_func); invisible(gc())
}
## Functional analysis: 2699 species with traits, 8814 sites
if (run_func) {
  rds_func <- file.path(comp_dir, "func_sac.rds")
  if (file.exists(rds_func)) {
    func_sac <- readRDS(rds_func)
  } else {
    func_sac <- spaccFunc(func_mat, coords[idx_90s, ], traits = trait_mat,
                           metric = "fdis",
                           n_seeds = 30, method = "knn",
                           distance = "haversine", seed = 42)
    saveRDS(func_sac, rds_func)
  }
  print(func_sac)
  plot(func_sac)
}
## spacc functional diversity: 8814 sites, 2699 species, 124 traits, 30 seeds
## Metrics: fdis


19. Halo Analysis (Area of Effect)

Use the halo dataset (Austria + surrounding buffer) to see how edge effects influence accumulation.

# Load areaOfEffect countries data (needed for support = "Austria")
data(countries, package = "areaOfEffect", envir = environment())

header_halo  <- fread(file.path(data_path, "austria_header_halo.csv"))
species_halo <- fread(file.path(data_path, "austria_species_halo.csv"))

cat(sprintf("Total plots: %d (core: %d, halo: %d)\n",
            nrow(header_halo),
            sum(header_halo$aoe_class == "core"),
            sum(header_halo$aoe_class == "halo")))
## Total plots: 116413 (core: 52546, halo: 63867)
common_halo <- intersect(header_halo$PlotObservationID,
                         species_halo$PlotObservationID)
header_halo <- header_halo[PlotObservationID %in% common_halo]
coords_halo <- header_halo[, .(x = Longitude, y = Latitude)]
plot_ids_halo <- header_halo$PlotObservationID

halo_mat <- make_pa_matrix(species_halo[, .(PlotObservationID, WFO_TAXON)],
                           plot_ids_halo)
rds_halo <- file.path(comp_dir, "halo.rds")
if (file.exists(rds_halo)) {
  h <- readRDS(rds_halo)
  sac_halo <- h$halo; sac_core <- h$core
  rm(h)
} else {
  sac_halo <- spacc(halo_mat, coords_halo, n_seeds = 30, method = "knn",
                    distance = "haversine", support = "Austria",
                    include_halo = TRUE, seed = 42)
  sac_core <- spacc(halo_mat, coords_halo, n_seeds = 30, method = "knn",
                    distance = "haversine", support = "Austria",
                    include_halo = FALSE, seed = 42)
  saveRDS(list(halo = sac_halo, core = sac_core), rds_halo)
}

halo_combined <- c(`With halo` = sac_halo, `Core only` = sac_core)
plot(halo_combined)


Session Info

sessionInfo()
## R version 4.5.2 (2025-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Europe/Luxembourg
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ape_5.8-1           hexify_0.3.8        rnaturalearth_1.2.0
## [4] sf_1.0-24           ggplot2_4.0.1       data.table_1.18.0  
## [7] spacc_0.1.0        
## 
## loaded via a namespace (and not attached):
##  [1] magic_1.6-1             sass_0.4.10             generics_0.1.4         
##  [4] class_7.3-23            KernSmooth_2.23-26      lattice_0.22-7         
##  [7] digest_0.6.39           magrittr_2.0.4          evaluate_1.0.5         
## [10] grid_4.5.2              RColorBrewer_1.1-3      fastmap_1.2.0          
## [13] Matrix_1.7-4            jsonlite_2.0.0          e1071_1.7-17           
## [16] DBI_1.2.3               gridExtra_2.3           mgcv_1.9-3             
## [19] viridisLite_0.4.2       scales_1.4.0            permute_0.9-8          
## [22] ade4_1.7-23             jquerylib_0.1.4         abind_1.4-8            
## [25] cli_3.6.5               rlang_1.1.7             units_1.0-0            
## [28] splines_4.5.2           withr_3.0.2             cachem_1.1.0           
## [31] yaml_2.3.12             vegan_2.7-2             otel_0.2.0             
## [34] geometry_0.5.2          tools_4.5.2             parallel_4.5.2         
## [37] dplyr_1.1.4             vctrs_0.7.0             R6_2.6.1               
## [40] proxy_0.4-29            lifecycle_1.0.5         classInt_0.4-11        
## [43] MASS_7.3-65             cluster_2.1.8.1         pkgconfig_2.0.3        
## [46] RcppParallel_5.1.11-1   pillar_1.11.1           bslib_0.9.0            
## [49] gtable_0.3.6            glue_1.8.0              Rcpp_1.1.1             
## [52] rnaturalearthdata_1.0.0 xfun_0.55               tibble_3.3.1           
## [55] tidyselect_1.2.1        FD_1.0-12.3             knitr_1.51             
## [58] farver_2.1.2            nlme_3.1-168            htmltools_0.5.9        
## [61] rmarkdown_2.30          labeling_0.4.3          compiler_4.5.2         
## [64] S7_0.2.1